// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.generator;

import com.google.archivepatcher.shared.DefaultDeflateCompatibilityWindow;
import com.google.archivepatcher.shared.JreDeflateParameters;
import com.google.archivepatcher.shared.MultiViewInputStreamFactory;
import com.google.archivepatcher.shared.RandomAccessFileInputStream;
import com.google.archivepatcher.shared.RandomAccessFileInputStreamFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException;

/**
 * Divines information about the compression used for a resource that has been compressed with a
 * deflate-compatible algorithm. This implementation produces results that are valid within the
 * {@link DefaultDeflateCompatibilityWindow}.
 */
public class DefaultDeflateCompressionDiviner {

  /**
   * The levels to try for each strategy, in the order to attempt them.
   */
  private final Map<Integer, List<Integer>> levelsByStrategy = getLevelsByStrategy();

  /**
   * A simple struct that contains a {@link MinimalZipEntry} describing a specific entry from a zip
   * archive along with an optional accompanying {@link JreDeflateParameters} describing the
   * original compression settings that were used to generate the compressed data in that entry.
   */
  public static class DivinationResult {
    /**
     * The {@link MinimalZipEntry} for the result; never null.
     */
    public final MinimalZipEntry minimalZipEntry;

    /**
     * The {@link JreDeflateParameters} for the result, possibly null. This value is only set if
     * {@link MinimalZipEntry#isDeflateCompressed()} is true <em>and</em> the compression settings
     * were successfully divined.
     */
    public final JreDeflateParameters divinedParameters;

    /**
     * Creates a new result with the specified fields.
     * @param minimalZipEntry the zip entry
     * @param divinedParameters the parameters
     */
    public DivinationResult(
        MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters) {
      if (minimalZipEntry == null) {
        throw new IllegalArgumentException("minimalZipEntry cannot be null");
      }
      this.minimalZipEntry = minimalZipEntry;
      this.divinedParameters = divinedParameters;
    }
  }

  /**
   * Load the specified archive and attempt to divine deflate parameters for all entries within.
   * @param archiveFile the archive file to work on
   * @return a list of results for each entry in the archive, in file order (not central directory
   * order). There is exactly one result per entry, regardless of whether or not that entry is
   * compressed. Callers can filter results by checking
   * {@link MinimalZipEntry#getCompressionMethod()} to see if the result is or is not compressed,
   * and by checking whether a non-null {@link JreDeflateParameters} was obtained.
   * @throws IOException if unable to read or parse the file
   * @see DivinationResult 
   */
  public List<DivinationResult> divineDeflateParameters(File archiveFile) throws IOException {
    List<DivinationResult> results = new ArrayList<DivinationResult>();
    for (MinimalZipEntry minimalZipEntry : MinimalZipArchive.listEntries(archiveFile)) {
      JreDeflateParameters divinedParameters = null;
      if (minimalZipEntry.isDeflateCompressed()) {
        // TODO(andrewhayden): Reuse streams to avoid churning file descriptors
        RandomAccessFileInputStreamFactory rafisFactory =
            new RandomAccessFileInputStreamFactory(
                archiveFile,
                minimalZipEntry.getFileOffsetOfCompressedData(),
                minimalZipEntry.getCompressedSize());
        divinedParameters = divineDeflateParameters(rafisFactory);
      }
      results.add(new DivinationResult(minimalZipEntry, divinedParameters));
    }
    return results;
  }

  /**
   * Returns an unmodifiable map whose keys are deflate strategies and whose values are the levels
   * that make sense to try with the corresponding strategy, in the recommended testing order.
   *
   * <ul>
   *   <li>For strategy 0, levels 1 through 9 (inclusive) are included.
   *   <li>For strategy 1, levels 4 through 9 (inclusive) are included. Levels 1, 2 and 3 are
   *       excluded because they behave the same under strategy 0.
   *   <li>For strategy 2, only level 1 is included because the level is ignored under strategy 2.
   * </ul>
   *
   * @return such a mapping
   */
  protected Map<Integer, List<Integer>> getLevelsByStrategy() {
    final Map<Integer, List<Integer>> levelsByStrategy = new HashMap<Integer, List<Integer>>();
    // The best order for the levels is simply the order of popularity in the world, which is
    // expected to be default (6), maximum compression (9), and fastest (1).
    // The rest of the levels are rarely encountered and their order is mostly irrelevant.
    levelsByStrategy.put(0, Collections.unmodifiableList(Arrays.asList(6, 9, 1, 4, 2, 3, 5, 7, 8)));
    levelsByStrategy.put(1, Collections.unmodifiableList(Arrays.asList(6, 9, 4, 5, 7, 8)));
    levelsByStrategy.put(2, Collections.singletonList(1));
    return Collections.unmodifiableMap(levelsByStrategy);
  }

  /**
   * Determines the original {@link JreDeflateParameters} that were used to compress a given piece
   * of deflated data.
   * @param compressedDataInputStreamFactory a {@link MultiViewInputStreamFactory} that can provide
   * multiple independent {@link InputStream} instances for the compressed data; the streams
   * produced must support {@link InputStream#mark(int)} and it is recommended that
   * {@link RandomAccessFileInputStream} instances be provided for efficiency if a backing file is
   * available. The stream will be reset for each recompression attempt that is required.
   * @return the parameters that can be used to replicate the compressed data in the
   * {@link DefaultDeflateCompatibilityWindow}, if any; otherwise <code>null</code>. Note that
   * <code>null</code> is also returned in the case of <em>corrupt</em> zip data since, by
   * definition, it cannot be replicated via any combination of normal deflate parameters.
   * @throws IOException if there is a problem reading the data, i.e. if the file contents are
   * changed while reading
   */
  public JreDeflateParameters divineDeflateParameters(
      MultiViewInputStreamFactory<?> compressedDataInputStreamFactory) throws IOException {
    InputStream compressedDataIn = compressedDataInputStreamFactory.newStream();
    if (!compressedDataIn.markSupported()) {
      try {
        compressedDataIn.close();
      } catch (Exception ignored) {
        // Nothing to do.
      }
      throw new IllegalArgumentException("input stream must support mark(int)");
    }

    // Record the input stream position to return to it each time a prediction is needed.
    compressedDataIn.mark(0); // The argument to mark is ignored and irrelevant

    // Make a copy of the stream for matching bytes of compressed input
    InputStream matchingCompressedDataIn = compressedDataInputStreamFactory.newStream();
    matchingCompressedDataIn.mark(0); // The argument to mark is ignored and irrelevant

    byte[] copyBuffer = new byte[32768];
    try {
      // Iterate over all relevant combinations of nowrap, strategy and level.
      for (boolean nowrap : new boolean[] {true, false}) {
        Inflater inflater = new Inflater(nowrap);
        Deflater deflater = new Deflater(0, nowrap);
        for (int strategy : new int[] {0, 1, 2}) {
          deflater.setStrategy(strategy);
          // Strategy 2 does not have the concept of levels, so vacuously call it 1.
          List<Integer> levelsToSearch = levelsByStrategy.get(strategy);
          for (int levelIndex = 0; levelIndex < levelsToSearch.size(); levelIndex++) {
            int level = levelsToSearch.get(levelIndex);
            deflater.setLevel(level);
            inflater.reset();
            deflater.reset();
            compressedDataIn.reset();
            matchingCompressedDataIn.reset();
            try {
              if (matches(
                  compressedDataIn, inflater, deflater, matchingCompressedDataIn, copyBuffer)) {
                return JreDeflateParameters.of(level, strategy, nowrap);
              }
            } catch (ZipException e) {
              // Parse error in input. The only possibilities are corruption or the wrong nowrap.
              // Skip all remaining levels and strategies.
              levelIndex = levelsToSearch.size();
              strategy = 2;
            }
          } // end of iteration on level
        } // end of iteration on strategy
      } // end of iteration on nowrap
    } finally {
      try {
        compressedDataIn.close();
      } catch (Exception ignored) {
        // Don't care.
      }
      try {
        matchingCompressedDataIn.close();
      } catch (Exception ignored) {
        // Don't care.
      }
    }
    return null;
  }

  /**
   * Check if the specified deflater will produce the same compressed data as the byte stream in
   * compressedDataIn and returns true if so.
   * @param compressedDataIn the stream of compressed data to read and reproduce
   * @param inflater the inflater for uncompressing the stream
   * @param deflater the deflater for recompressing the output of the inflater
   * @param matchingStreamInput an independent but identical view of the bytes in compressedDataIn
   * @param copyBuffer buffer to use for copying bytes between the inflater and the deflater
   * @return true if the specified deflater reproduces the bytes in compressedDataIn, otherwise
   * false
   * @throws IOException if anything goes wrong; in particular, {@link ZipException} is thrown if
   * there is a problem parsing compressedDataIn
   */
  private boolean matches(
      InputStream compressedDataIn,
      Inflater inflater,
      Deflater deflater,
      InputStream matchingStreamInput,
      byte[] copyBuffer)
      throws IOException {
    MatchingOutputStream matcher = new MatchingOutputStream(matchingStreamInput, 32768);
    // This stream will deliberately be left open because closing it would close the
    // underlying compressedDataIn stream, which is not desired.
    InflaterInputStream inflaterIn = new InflaterInputStream(compressedDataIn, inflater, 32768);
    DeflaterOutputStream out = new DeflaterOutputStream(matcher, deflater, 32768);
    int numRead = 0;
    try {
      while ((numRead = inflaterIn.read(copyBuffer)) >= 0) {
        out.write(copyBuffer, 0, numRead);
      }
      // When done, all bytes have been successfully recompressed. For sanity, check that
      // the matcher has consumed the same number of bytes and arrived at EOF as well.
      out.finish();
      out.flush();
      matcher.expectEof();
      // At this point the data in the compressed output stream was a perfect match for the
      // data in the compressed input stream; the answer has been found.
      return true;
    } catch (MismatchException e) {
      // Fast-fail case when the compressed output stream doesn't match the compressed input
      // stream. These are not the parameters you're looking for!
    } finally {
      try {
        out.close();
      } catch (Exception ignored) {
        // Don't care.
      }
    }
    return false;
  }
}
